program ranktest, rclass
	version 13
	local version
	/* 
	This program tests rank invariance or rank similarity
	example syntax is:
	ranktest y d z s, graph generate(rankbin freqs) bounds taulist(25 50 75) boundvars(uhat qdelta delta)
	It can also construct bounds on the treatment effects
	If one wants to graph the bounds, follow up the command with something like:
	foreach tau in `taulist' {
		twoway rarea qdeltaub`tau' qdeltalb`tau' uhat
	}
	for each tau in taulist, and
	twoway (line qdelta deltaub) (line qdelta deltalb)
	*/
	syntax varlist(min=4 max=4 numeric) [if] [in],[GRaph GENerate(namelist min=2 max=2)] ///
	[BOUNDS TAUlist(numlist <100 >0 sort) BOUNDVARS(namelist min=3 max=3)]
	marksample touse
	
	* initialize temporary names, variables, and files:
	tempname h ts theta
	tempvar d0 z0 ds kappa0 kappa1 uhat bins u1imp sspline yspline ///
		ypz fushat fushatz wuj fushatall nd topick picked
	tempfile scratch
	tokenize `varlist'
	local y "`1'"
	macro shift
	local d "`1'"
	macro shift
	local z "`1'"
	macro shift
	local s "`1'"
	macro shift
	
	tokenize `generate'
	local rankbin "`1'"
	local freqs "`2'"
	if "`bounds'"=="bounds" {
		tokenize `boundvars'
		local uhat "`1'"
		local qdelta "`2'"
		local delta "`3'"
		
	}
	preserve
	qui keep if `touse'
	local n=_N
	local d1 `d'
	qui gen `d0'=1-`d'
	local z1 `z'
	qui gen `z0'=1-`z'
	qui gen `ds'=`d'*`s'
	qui gen `uhat'=.
	mata y=.
	mata kappa=.
	mata d=.
	foreach dval in 1 0 {
		qui sum `z`dval''
		local tau`dval'=r(mean)
		* generate Abadie's kappa
		gen `kappa`dval''=1-`d`dval''*(1-`z`dval'')/(1-`tau`dval'')-(1-`d`dval'')*`z`dval''/`tau`dval''
		
		* create xkx matrix for use later:
		mata st_view(y,.,"`y'")
		mata st_view(kappa,.,"`kappa`dval''")
		mata st_view(d,.,"`d`dval''")
		mata x=d,J(rows(d),1,1)
		mata xkx=cross(x,kappa,x)
		mata xkxinv=invsym(xkx)
		*generate ranks by estimated cdf of Y(`dval') at each outcome observation
		mata yifd = st_data(.,"`y'","`d`dval''")
		mata w=J(rows(y),rows(yifd),.)
		mata for (j=1;j<=rows(yifd);j++) w[.,j]=(y:<=yifd[j]):*d
		mata betas=xkxinv*cross(x,kappa,w)
		mata Fifd=betas[1,.]
		mata Fifd=Fifd'
		mata st_store(.,"`uhat'","`d`dval''",Fifd)
	}
	
	* correct sample ranks for ties (this only accounts for a mass point at zero)
	qui sum `y'
	if r(min)==0 {
		replace `uhat'=`uhat'/2 if `y'==0
	}
	* now do Abadie-kappa-weighted least squares on uhats
	abadiels `uhat' `d' `z' `s' `ds'
	mat `theta'=e(b)
	mat `theta'=`theta''
	*mat `h'=(1,0,0,0\0,0,1,0)
	* I actually only want to test the interaction coefficient
	mat `h'=(0,0,1,0)
	mat `h'=`h''
	mat `ts'=`theta''*`h'*invsym(`h''*e(V)*`h')*`h''*`theta'
	local deltastat=`ts'[1,1]
	local pval=1-chi2(1,`deltastat')
	
	if "`graph'"=="graph" {
		mata output=histograms("`uhat' `kappa1' `d' `s'")
		mata st_matrix("`freqs'",*output[1])
		local cols=colsof(`freqs')
		mata labels=*output[2]
		mata st_local("numcats",strofreal(*output[3]))
		restore
		svmat `freqs'
		local labels
		local cols=colsof(`freqs')
		forvalues i=1/`cols' {
			mata st_varlabel("`freqs'`i'",labels[`i'])
			mata st_local("labeli",labels[`i'])
			local labels `"`labels' "`labeli'""'
		}
		tempname gmargin
		
		graph bar (asis) `freqs'1-`freqs'`numcats',over(`freqs'`cols') title("Distribution of Ranks by `s'") name(`gmargin',replace)
		local glist `gmargin'
		forvalues j=1/`numcats' {
			local untreated=`numcats'+`j'
			local treated=2*`numcats'+`j'
			tempname g`j'
			local label: var label `freqs'`untreated'
			local subtitle=substr("`label'",1,strpos("`label'",",")-1)
			graph bar (asis) `freqs'`untreated' `freqs'`treated',over(`freqs'`cols') title("Distribution of Ranks by `s'") subtitle("`subtitle'") name(`g`j'',replace) legend(label(1 "`d' = 0") label(2 "`d' = 1"))
			local glist `glist' `g`j''
		}
		rename `freqs'`cols' `rankbin'
		*graph combine `glist'
		return matrix freqs=`freqs'
		return local labels `"`labels'"'
	}
	else restore
	if "`bounds'" == "bounds" {
		* "
		keep if `touse'
		preserve
		sort `d'
		by `d': egen `uhat'=rank(`y'),track
		by `d': egen `nd'=count(`y')
		qui replace `uhat'=`uhat'/`nd'
		* construct bounds on individual-level treatment effect distribution"
		foreach dval in 0 1 {
			qui save `scratch',replace
			collapse y`dval'=`y' if `d'==`dval',by(`uhat')
	
			tempfile d`dval's
			qui save `d`dval's'
			use `scratch',clear
		}
		
		sort `d'
		qui sum if `d'==0
		local n0=r(N)
		* choose a random sample of d==0 to evaluate cdf bounds
		gen `topick'=uniform()
		sort `d' `topick'
		local numtopick=min(`n0',`n0')
		gen byte `picked'=_n<=`numtopick'
		* now do distribution regressions over a grid
		* but it will have to be an absolute grid, not a centile
		* grid.
		* create grid:
		foreach dval in 0 1 {
			qui sum `y' if `d'==`dval'
			local max`dval'=r(max)
			local min`dval'=r(min)
		}
		local gridmax=max(`max0',`max1')
		local gridmin=min(`min0',`min1')
		local gridn=25
		local gridstep=(`gridmax'-`gridmin')/`gridn'
		local x0=`gridmin'
		forvalues j=1/`gridn' {
			local x`j'=`gridmin'+`j'*`gridstep'
		}

		mkspline `sspline'=`s',cubic nknots(5)
		mkspline `yspline'=`y' if `d'==0,cubic nknots(5)
		
		
		* now I can calculate bounds of the cdf of delta at any z
		* zlabel is going to run from 0 to 2*gridn
		local numzlabels=2*`gridn'
		* define zindex in terms of gridsteps
		forvalues zlabel=0/`numzlabels' {
			*display "on z-step `zlabel' of `numzlabels'"
			local zind= `zlabel'-`gridn'
			local zval`zlabel'=`zind'*`gridstep'
			* total number of possible grid point comparisons:
			*local jstart=max(0,`zind')
			*local jend=min(`gridn',`gridn'+`zind')
			*forvalues j=`jstart'/`jend' {
			* now compute our bounds
			* first get the imputed treated rank of Y(0) + z in the
			* treated distribution
			qui gen `ypz'=`y'+`zval`zlabel'' if `d'==0
			qui save `scratch',replace
			qui keep if `d'==0
			keep `ypz'
			qui duplicates drop
			rename `ypz' y1
			qui merge 1:1 y1 using `d1s'
			ipolate `uhat' y1 ,generate(`u1imp') epolate
			qui replace `u1imp'=max(0,min(1,`u1imp'))
			qui keep if _merge==3 | _merge==1
			keep y1 `u1imp'
			rename y1 `ypz'
			tempfile uimps
			qui save `uimps'
			use `scratch',clear
			qui merge m:1 `ypz' using `uimps'
			drop _merge
			* Now, get estimates of the rank cdf conditional on S evaluated at each 
			* untreated rank and each imputed treated rank(z)
			qui gen `fushat'=.
			qui gen `fushatz'=.
			*sort `d' `y'
			
			forvalues j=1/`numtopick' {
				*display "on z-step `zlabel' of `numzlabels', j = `j' of `n0'"
				gen `wuj'=`uhat'<=`uhat'[`j']
				qui reg `wuj' `sspline'*
				qui predict `fushatall'
				qui replace `fushat'=max(0,min(1,`fushatall')) in `j'
				drop `fushatall'
				qui replace `wuj'=`uhat'<=`u1imp'[`j']
				qui reg `wuj' `sspline'*
				qui predict `fushatall'
				qui replace `fushatz'=max(0,min(1,`fushatall')) in `j'
				drop `wuj' `fushatall'	
				*display "z = `zval`zlabel''"
				*li `fushat' `fushatz' in `j'
			}
			tempvar deltaylb`zlabel' deltayub`zlabel'
			qui gen `delta'lb`zlabel'=max((`fushatz'-`fushat')/(1-`fushat'),0) if `picked'==1
			qui gen `delta'ub`zlabel'=min(`fushatz'/`fushat',1) if `picked'==1
			
			* integrate over s to get by y0 
			qui reg `delta'lb`zlabel' `yspline'*
			qui predict `deltaylb`zlabel''
			qui reg `delta'ub`zlabel' `yspline'*
			qui predict `deltayub`zlabel''
			drop `fushat' `fushatz' `ypz' `u1imp' 
			
			* Here I'm going to integrate over y0 to get bounds by S,
			* and then impose exchangeability, and then finally integrate
			* over S to get global bounds
			
		}
		qui save `scratch',replace
		qui keep if `picked'==1
		keep `uhat' 
		tempfile qdeltafile
		qui save `qdeltafile',replace
		use `scratch',clear
		sort `y'
		foreach tau in `taulist' {
			qui gen `qdelta'ub`tau'=.
			qui gen `qdelta'lb`tau'=.
			forvalues zlabel=`numzlabels'(-1)0 {
				qui replace `qdelta'lb`tau'=`zval`zlabel'' if `deltayub`zlabel''>=`tau'/100 & `picked'==1
				qui replace `qdelta'ub`tau'=`zval`zlabel'' if `deltaylb`zlabel''>=`tau'/100 & `picked'==1
			}
			twoway rarea `qdelta'ub`tau' `qdelta'lb`tau' `uhat', ///
			title("Bounds on the `tau'-th percentile of the treatment effect") subtitle("Frandsen-Lefgren")
			qui save `scratch',replace
			qui keep if `picked'==1
			keep `qdelta'ub`tau' `qdelta'lb`tau' `uhat'
			qui merge m:m `uhat' using `qdeltafile'
			drop _merge
			qui save `qdeltafile',replace
			use `scratch',clear
		}
		
		
		* now let's graph quantiles of the treatment effect
		collapse `delta'ub* `delta'lb*
		gen id=_n
		qui reshape long `delta'ub `delta'lb, i(id) j(zlabel)
		qui gen zval=.
		forvalues zlabel=0/`numzlabels' {
			qui replace zval=`zval`zlabel'' if zlabel==`zlabel'
		}
		rename zval `qdelta'
		twoway (line `qdelta' `delta'ub) (line `qdelta' `delta'lb),title("bounds on quantiles of the treatment effect") subtitle("Frandsen-Lefgren")	
		keep `qdelta' `delta'ub `delta'lb
		tempfile deltafile
		qui save `deltafile'
		restore
		qui merge 1:1 _n using `qdeltafile'
		drop _merge
		qui merge 1:1 _n using `deltafile'
		drop _merge
	}
	
								
	return scalar pval=`pval'
	return scalar delta=`deltastat'
	local pvalstr="p-value = " + string(`pval',"%5.2f") 
	local tsstr="test statistic = " +string(`deltastat',"%5.3f")
	display _n
	display "Rank Similarity Test Results" _n
	display "`tsstr'" _n
	display "`pvalstr'"
	
end

* mata programs
mata:
	pointer vector histograms(string scalar varlist)
{ // takes as arguments uhat, kappa, D, S 
// produced kappa-weighted histogram frequences of ranks. The output
// is a matrix with 10 rows (one for each rank decile) and 1+3*J columns,
// where J is the number of support points in S..
// The first column gives the decile rank: .1(.1)1. The next J
// gives fractions conditional on S=s1..S=sJ 
// The next J give fractions condition on D=0,S=s1..D=0,S=sJ, and
// the last J give fractions conditional on D=1, S=s1..D=1,S=sJ
	
	vars=tokens(varlist)
	uhat=st_data(.,vars[1])
	kappa=st_data(.,vars[2])
	d=st_data(.,vars[3])
	s=st_data(.,vars[4])
	bins=(1::10)/10
	inbins=J(rows(uhat),rows(bins),.)
	inbins[.,1]=(0:<uhat):*(uhat:<=bins[1])
	for (i=2;i<=rows(bins);i++) {
		inbins[.,i]=(bins[i-1]:<uhat):*(uhat:<=bins[i])
	}
	svals=uniqrows(s)
	message = vars[4]
	// if number of uniq svals is more than 3, reduce to dummy above/below median
	if (rows(svals) > 3) {
		s=s:>svals[floor(rows(svals)/2)]
		svals=uniqrows(s)
		message=vars[4]+">median("+vars[4]+")"
	}
	freqs=(J(rows(bins),3*rows(svals),.),bins)
	labels=(J(1,3*rows(svals),""),"rank")
	for (j=1;j<=rows(svals);j++) {
		// conditional on S=sj
		labels[j]=message+" = "+strofreal(svals[j])
		m=colsum(kappa:*inbins:*(s:==svals[j]))/sum(kappa:*(s:==svals[j]))
		freqs[.,j]=m'
		// conditional on S=sj,d=0
		labels[rows(svals)+j]=message+" = "+strofreal(svals[j])+", "+vars[3]+"=0"
		m=colsum(kappa:*inbins:*(s:==svals[j]):*(d:==0))/sum(kappa:*(s:==svals[j]):*(d:==0))
		freqs[.,rows(svals)+j]=m'
		// conditional on S=sj,d=1
		labels[2*rows(svals)+j]=message+" = "+strofreal(svals[j])+", "+vars[3]+"=1"
		m=colsum(kappa:*inbins:*(s:==svals[j]):*(d:==1))/sum(kappa:*(s:==svals[j]):*(d:==1))
		freqs[.,2*rows(svals)+j]=m'
	}
	return((&freqs,&labels,&rows(svals)))
}

end


